##############################################################################################################
# Purpose: This file provides replication code for re-assignment of treatment among vendors who already received Intervention 1 in the Nigeria Metaketa study.
# 	     The initial assignment is thus valid for the pure control only. This re-assignment is valid for the group receiving Intervention 1 only, and the 4 
#	     conditions of the 2 x 2 factorial design specifying which type (message and mode of delivery) of Intervention 2 will be received. Collective Treatment 
#	     is also assigned to half of all vendors receiving each condition in Intervention 2.
# By: Janica Magat
# Date created: August 27, 2018
# Last updated: November 13, 2020
##############################################################################################################

rm(list = ls()) # command to clear global environment

# set working directory
setwd("~/Dropbox/Lagos EGAP Metaketa Co-PI folder/05 Analyses/Final Data Analysis/GottliebLebasMagat_LagosTax_Replication")

# Load Necessary Packages -------------------------------------------------

# list of packages for session
.packages = c("haven", "tidyverse", "Hmisc", "magrittr", "blockTools", "epiDisplay")

# install CRAN packages (if not already installed)
.inst <- .packages %in% installed.packages()
if(length(.packages[!.inst]) > 0) install.packages(.packages[!.inst])

# load packages into session 
lapply(.packages, require, character.only=TRUE)

# Part 1: Compliers ONLY --------------------------------------------------

# Load datasets -----------------------------------------------------------

### load original dataset used for intervention 1
load("Data/lagostax_int1assg.rda")

### load kobo data
load("Data/lagostax_intervention.rda")

### load phone booth data
load("Data/lagostax_phonestatus.rda")

# Merge datasets -----------------------------------------------------------

require(plyr)
intervention1 <- join_all(list(d_fin, kobo.final, phone.booth), by = "SbjNum", type = 'full', match = 'all')

# check for duplicates
n_occur <- data.frame(table(intervention1$SbjNum))
n_occur[n_occur$Freq > 1,]

# Create indicators for vendors reached -----------------------------------

intervention1$phone_booth <- NA
intervention1$phone_booth <- ifelse(intervention1$Attempt1_Status == "Agreed" | intervention1$Attempt2_Status == "Agreed" | intervention1$Attempt3_Status == "Agreed" | intervention1$Attempt5_Status == "Agreed" | intervention1$Attempt5_Status.1 == "Agreed" | intervention1$status == "completed" | intervention1$status == "about to visit" | intervention1$status == "rescheduled", "agreed/about to visit", intervention1$phone_booth)

intervention1$phone_booth <- ifelse(is.na(intervention1$phone_booth) & intervention1$Attempt1_Status == "Callback" | intervention1$Attempt2_Status == "Callback" | intervention1$Attempt3_Status == "Callback" | intervention1$Attempt5_Status == "Callback" | intervention1$Attempt5_Status.1 == "Callback" | intervention1$Attempt1_Status == "Wrong Vendor" | intervention1$Attempt2_Status == "Wrong Vendor" | intervention1$Attempt3_Status == "Wrong Vendor" | intervention1$Attempt5_Status == "Wrong Vendor" | intervention1$Attempt5_Status.1 == "Wrong Vendor" | intervention1$Attempt1_Status == "Wrong Number" | intervention1$Attempt2_Status == "Wrong Number" | intervention1$Attempt3_Status == "Wrong Number" | intervention1$Attempt5_Status == "Wrong Number" | intervention1$Attempt5_Status.1 == "Wrong Number", "callback/no appointment", intervention1$phone_booth)

#table(intervention1$phone_booth, useNA = "ifany")

# Generate indicator for compliance ---------------------------------------

intervention1$kobo <- as.character(intervention1$kobo)
intervention1$phone_booth <- as.character(intervention1$phone_booth)

intervention1$compliers <- NA
intervention1$compliers <- ifelse(intervention1$kobo == "yes" | intervention1$phone_booth == "agreed/about to visit", 1, intervention1$compliers)

#table(intervention1$compliers, useNA = "ifany")

# Step 1: Drop if intervention1_treatment == "C" or if kobo != "yes".

int1 <- intervention1[ which(intervention1$treatment != "C" & intervention1$compliers == 1), ]

# check for duplicates
n_occur <- data.frame(table(int1$SbjNum))
n_occur[n_occur$Freq > 1,]
   

### block
set.seed(90210)
data_block <- block(data = int1, n.tr = 3,
                    id.vars = c("SbjNum", "s3"), 
                    block.vars = c("market_size", "trust", "friends_mkt", "yoruba", "rent_dummy", "LIRS_connect", "club"), algorithm = "optGreedy")

### assign one member of each blocked unit to treatment/control
assg <- assignment(data_block, seed = 90210)

### assign block identifiers
int1$block_id <- createBlockIDs(data_block, data = int1, id.var = "SbjNum")

#View(int1[, c("SbjNum", "s3", "block_id")])

### add the treatment group number
l <- data.frame(assg[1])
names(l)[c(1:3)] <- c("assg.1_Treatment1", "assg.1_Treatment2", "assg.1_Treatment3")

l.long <- reshape(l, varying = c("assg.1_Treatment1", "assg.1_Treatment2", "assg.1_Treatment3"), 
                  direction = "long", sep = "_")

names(l.long)[c(2:3)] <- c("treatment", "SbjNum")
l.long$group <- NA
l.long$group <- ifelse(l.long$treatment == "Treatment1", "G1", l.long$group) 
l.long$group <- ifelse(l.long$treatment == "Treatment2", "G2", l.long$group)
l.long$group <- ifelse(l.long$treatment == "Treatment3", "G3", l.long$group)

names(l.long)[1] <- "max_dist_block1" # largest of the multivariate distances between all possible pairs in the block
group <- l.long[, c(1, 3, 5)]
group$SbjNum <- as.character(group$SbjNum)

### add assignment to the main dataframe
require(plyr)
block1 <- join(int1, group, by = "SbjNum", type = 'left', match = 'all')

#View(block1[, c("SbjNum", "s3", "block_id", "group")])
#table(block1$group, useNA = "ifany")

names(block1)

# save dataset
save(block1, file = "Treatment Assignment/Output Data/int2_block1.rda")

### calculate balance statistics
block1$group <- as.character(block1$group)

testvar <- names(block1)[names(block1) != "group"]

anovaresults <- lapply(testvar, function(varname){
  
  tryCatch({res.aov <- aov(as.formula(paste(varname, "group", sep = " ~ ")), data = block1)
  
  aov.tukey <- TukeyHSD(res.aov)
  
  resultstable <- data.frame(variable = varname, 
                             comparison = rownames(aov.tukey$group),
                             anova.p.value = summary(res.aov)[[1]][["Pr(>F)"]][1],
                             aov.tukey$group)
  
  }, error=function(e){})
  
})

anovaresults <- do.call(rbind, anovaresults)

anovaresults$p_sig 
anovaresults$p_sig[anovaresults$p.adj < 0.1 & anovaresults$p.adj > 0.05] <- "*" 
anovaresults$p_sig[anovaresults$p.adj < 0.05 & anovaresults$p.adj > 0.001] <- "**" 
anovaresults$p_sig[anovaresults$p.adj < 0.001] <- "***" 


# Step 3: Within G2, block-randomize everyone into T2, T3, T4, and T5 with equal probability

g2 <- block1[ which(block1$group == "G2"), ]

### block
set.seed(90210)
data_block <- block(data = g2, n.tr = 4,
                    id.vars = c("SbjNum", "s3"), 
                    block.vars = c("market_size", "trust", "friends_mkt", "yoruba", "rent_dummy", "LIRS_connect", "club"), algorithm = "optGreedy")

### assign one member of each blocked unit to treatment/control
assg <- assignment(data_block, seed = 90210)

### assign block identifiers
g2$block_id <- createBlockIDs(data_block, data = g2, id.var = "SbjNum")

#View(g2[, c("SbjNum", "s3", "block_id")])

### add the treatment group number
l <- data.frame(assg[1])
names(l)[c(1:4)] <- c("assg.1_Treatment1", "assg.1_Treatment2", "assg.1_Treatment3", "assg.1_Treatment4")

l.long <- reshape(l, varying = c("assg.1_Treatment1", "assg.1_Treatment2", "assg.1_Treatment3", "assg.1_Treatment4"), 
                  direction = "long", sep = "_")

names(l.long)[c(2:3)] <- c("treatment", "SbjNum")
#table(l.long$treatment, useNA = "ifany")

l.long$int2_g2 <- NA
l.long$int2_g2 <- ifelse(l.long$treatment == "Treatment1", "T2", l.long$int2_g2)
l.long$int2_g2 <- ifelse(l.long$treatment == "Treatment2", "T3", l.long$int2_g2)
l.long$int2_g2 <- ifelse(l.long$treatment == "Treatment3", "T4", l.long$int2_g2)
l.long$int2_g2 <- ifelse(l.long$treatment == "Treatment4", "T5", l.long$int2_g2)

names(l.long)[1] <- "max_dist_int2_g2" # largest of the multivariate distances between all possible pairs in the block
g2 <- l.long[, c(1, 3, 5)]
g2$SbjNum <- as.character(g2$SbjNum)

### add assignment to the main dataframe
require(plyr)
block1_g2 <- join(block1, g2, by = "SbjNum", type = 'right', match = 'all')

#View(block1_g2[, c("SbjNum", "s3", "market_size", "group", "int2_g2")])
#table(block1_g2$group, useNA = "ifany") # T2 and T3 empty

g2.assign <- block1_g2[ which(is.na(block1_g2$SbjNum) == F), ]
#table(g2.assign$group, useNA = "ifany")

# save dataset
# this dataset contains treatment assignment for vendors that are in "G2"
save(g2.assign, file = "Treatment Assignment/Output Data/g2_assign.rda")


# Step 4: Within G3, block-randomize everyone into T2, T3, T4, and T5 with equal probability

g3 <- block1[ which(block1$group == "G3"), ]

### block
set.seed(90210)
data_block <- block(data = g3, n.tr = 4,
                    id.vars = c("SbjNum", "s3"), 
                    block.vars = c("market_size", "trust", "friends_mkt", "yoruba", "rent_dummy", "LIRS_connect", "club"), algorithm = "optGreedy")

### assign one member of each blocked unit to treatment/control
assg <- assignment(data_block, seed = 90210)

### assign block identifiers
g3$block_id <- createBlockIDs(data_block, data = g3, id.var = "SbjNum")

#View(g3[, c("SbjNum", "s3", "block_id")])

### add the treatment group number
l <- data.frame(assg[1])
names(l)[c(1:4)] <- c("assg.1_Treatment1", "assg.1_Treatment2", "assg.1_Treatment3", "assg.1_Treatment4")

l.long <- reshape(l, varying = c("assg.1_Treatment1", "assg.1_Treatment2", "assg.1_Treatment3", "assg.1_Treatment4"), 
                  direction = "long", sep = "_")

names(l.long)[c(2:3)] <- c("treatment", "SbjNum")
#table(l.long$treatment, useNA = "ifany")

l.long$int2_g3 <- NA
l.long$int2_g3 <- ifelse(l.long$treatment == "Treatment1", "T2", l.long$int2_g3)
l.long$int2_g3 <- ifelse(l.long$treatment == "Treatment2", "T3", l.long$int2_g3)
l.long$int2_g3 <- ifelse(l.long$treatment == "Treatment3", "T4", l.long$int2_g3)
l.long$int2_g3 <- ifelse(l.long$treatment == "Treatment4", "T5", l.long$int2_g3)

names(l.long)[1] <- "max_dist_int2_g3" # largest of the multivariate distances between all possible pairs in the block
g3 <- l.long[, c(1, 3, 5)]
g3$SbjNum <- as.character(g3$SbjNum)

### add assignment to the main dataframe
require(plyr)
block1_g3 <- join(block1, g3, by = "SbjNum", type = 'right', match = 'all')

#View(block1_g3[, c("SbjNum", "s3", "market_size", "group", "int2_g3")])
#table(block1_g3$group, useNA = "ifany") 

g3.assign <- block1_g3[ which(is.na(block1_g3$SbjNum) == F), ]
#table(g3.assign$group, useNA = "ifany")

# save dataset
# this dataset contains treatment assignment for vendors that are in "G3"
save(g3.assign, file = "Treatment Assignment/Output Data/g3_assign.rda")

### append the datasets g2.assign and g3.assign together

names(g2.assign)[264] <- "max_dist_int2"
names(g3.assign)[264] <- "max_dist_int2"
names(g2.assign)[265] <- "int2_treatment"
names(g3.assign)[265] <- "int2_treatment"

int2_treatment <- rbind(g2.assign, g3.assign)

#View(int2_treatment[, c("SbjNum", "s3", "market_size", "group", "int2_treatment")])

#table(int2_treatment$int2_treatment, useNA = "ifany")
#table(g2.assign$int2_treatment, useNA = "ifany")
#table(g3.assign$int2_treatment, useNA = "ifany")

# save dataset
# this dataset contains intervention 2 treatment assignment for vendors (note: no collective )
save(int2_treatment, file = "Treatment Assignment/Output Data/int2_treatment.rda")

### calculate balance statistics
int2_treatment$int2_treatment <- as.character(int2_treatment$int2_treatment)

testvar <- names(int2_treatment)[names(int2_treatment) != "int2_treatment"]

anovaresults <- lapply(testvar, function(varname){
  
  tryCatch({res.aov <- aov(as.formula(paste(varname, "int2_treatment", sep = " ~ ")), data = int2_treatment)
  
  aov.tukey <- TukeyHSD(res.aov)
  
  resultstable <- data.frame(variable = varname, 
                             comparison = rownames(aov.tukey$int2_treatment),
                             anova.p.value = summary(res.aov)[[1]][["Pr(>F)"]][1],
                             aov.tukey$int2_treatment)
  
  }, error=function(e){})
  
})

anovaresults <- do.call(rbind, anovaresults)

anovaresults$p_sig 
anovaresults$p_sig[anovaresults$p.adj < 0.1 & anovaresults$p.adj > 0.05] <- "*" 
anovaresults$p_sig[anovaresults$p.adj < 0.05 & anovaresults$p.adj > 0.001] <- "**" 
anovaresults$p_sig[anovaresults$p.adj < 0.001] <- "***" 


# Step 5: All vendors in G2 will receive collective treatment and all vendors in G3 will not

# check for duplicates
n_occur <- data.frame(table(int2_treatment$SbjNum))
n_occur[n_occur$Freq > 1,]

#View(int2_treatment[, c("SbjNum", "group", "int2_treatment")])


# assign all observations in "G2" to the collective treatment
int2_treatment$collective <- NA
int2_treatment$collective <- ifelse(int2_treatment$group == "G2", "CA", int2_treatment$collective)

# assign all observations in "G3" to the collective control
int2_treatment$collective <- ifelse(int2_treatment$group == "G3", "No CA", int2_treatment$collective)

# tabulate 
#table(int2_treatment$collective, useNA = "ifany")

# create visit status
int2_treatment$visit_status <- ifelse(int2_treatment$kobo == "yes", "completed", NA)
int2_treatment$visit_status <- ifelse(is.na(int2_treatment$kobo), int2_treatment$phone_booth, int2_treatment$visit_status)

#table(int2_treatment$visit_status, useNA =)

### save datasets
save(int2_treatment, file = "Treatment Assignment/Output Data/int2_treatment_CA_AllMarkets.rda")

# Part 2: Reached but not compliers ---------------------------------------

# Load datasets -----------------------------------------------------------

rm(list = ls())

### load original dataset used for intervention 1
load("Data/lagostax_int1assg.rda")

### load kobo data
load("Data/lagostax_intervention.rda")

### load phone booth data
load("Data/lagostax_phonestatus.rda")

# Merge datasets -----------------------------------------------------------

require(plyr)
intervention1 <- join_all(list(d_fin, kobo.final, phone.booth), by = "SbjNum", type = 'full', match = 'all')

# check for duplicates
n_occur <- data.frame(table(intervention1$SbjNum))
n_occur[n_occur$Freq > 1,]

# Create indicators for vendors reached -----------------------------------

intervention1$phone_booth <- NA
intervention1$phone_booth <- ifelse(intervention1$Attempt1_Status == "Agreed" | intervention1$Attempt2_Status == "Agreed" | intervention1$Attempt3_Status == "Agreed" | intervention1$Attempt5_Status == "Agreed" | intervention1$Attempt5_Status.1 == "Agreed" | intervention1$status == "completed" | intervention1$status == "about to visit" | intervention1$status == "rescheduled", "agreed/about to visit", intervention1$phone_booth)

intervention1$phone_booth <- ifelse(is.na(intervention1$phone_booth) & intervention1$Attempt1_Status == "Callback" | intervention1$Attempt2_Status == "Callback" | intervention1$Attempt3_Status == "Callback" | intervention1$Attempt5_Status == "Callback" | intervention1$Attempt5_Status.1 == "Callback" | intervention1$Attempt1_Status == "Wrong Vendor" | intervention1$Attempt2_Status == "Wrong Vendor" | intervention1$Attempt3_Status == "Wrong Vendor" | intervention1$Attempt5_Status == "Wrong Vendor" | intervention1$Attempt5_Status.1 == "Wrong Vendor" | intervention1$Attempt1_Status == "Wrong Number" | intervention1$Attempt2_Status == "Wrong Number" | intervention1$Attempt3_Status == "Wrong Number" | intervention1$Attempt5_Status == "Wrong Number" | intervention1$Attempt5_Status.1 == "Wrong Number", "callback/no appointment", intervention1$phone_booth)

#table(intervention1$phone_booth, useNA = "ifany")


# Generate indicator for reached but not complied -------------------------

intervention1$kobo <- as.character(intervention1$kobo)
intervention1$phone_booth <- as.character(intervention1$phone_booth)

intervention1$noncompliers <- NA
intervention1$noncompliers <- ifelse(is.na(intervention1$kobo) & intervention1$phone_booth == "callback/no appointment", 1, intervention1$noncompliers)

#table(intervention1$noncompliers, useNA = "ifany")

# Step 1: Drop if intervention1_treatment == "C" 

int1 <- intervention1[ which(intervention1$treatment != "C" & intervention1$noncompliers == 1), ]

# check for duplicates
n_occur <- data.frame(table(int1$SbjNum))
n_occur[n_occur$Freq > 1,]

# Step 2: Block-randomize this sample into three evenly-sized groups: G1) T1, G2) intervention 2 with collective, G3) intervention 2 without collective.  

### block
set.seed(90210)
data_block <- block(data = int1, n.tr = 3,
                    id.vars = c("SbjNum", "s3"), 
                    block.vars = c("market_size", "trust", "friends_mkt", "yoruba", "rent_dummy", "LIRS_connect", "club"), algorithm = "optGreedy")

### assign one member of each blocked unit to treatment/control
assg <- assignment(data_block, seed = 90210)

### assign block identifiers
int1$block_id <- createBlockIDs(data_block, data = int1, id.var = "SbjNum")

#View(int1[, c("SbjNum", "s3", "block_id")])

### add the treatment group number
l <- data.frame(assg[1])
names(l)[c(1:3)] <- c("assg.1_Treatment1", "assg.1_Treatment2", "assg.1_Treatment3")

l.long <- reshape(l, varying = c("assg.1_Treatment1", "assg.1_Treatment2", "assg.1_Treatment3"), 
                  direction = "long", sep = "_")

names(l.long)[c(2:3)] <- c("treatment", "SbjNum")
l.long$group <- NA
l.long$group <- ifelse(l.long$treatment == "Treatment1", "G1", l.long$group) 
l.long$group <- ifelse(l.long$treatment == "Treatment2", "G2", l.long$group)
l.long$group <- ifelse(l.long$treatment == "Treatment3", "G3", l.long$group)

names(l.long)[1] <- "max_dist_block1" # largest of the multivariate distances between all possible pairs in the block
group <- l.long[, c(1, 3, 5)]
group$SbjNum <- as.character(group$SbjNum)

### add assignment to the main dataframe
require(plyr)
block1 <- join(int1, group, by = "SbjNum", type = 'left', match = 'all')

#View(block1[, c("SbjNum", "s3", "block_id", "group")])
#table(block1$group, useNA = "ifany")

names(block1)

# save dataset
save(block1, file = "Treatment Assignment/Output Data/int2_block1_part2.rda")

### calculate balance statistics
block1$group <- as.character(block1$group)

testvar <- names(block1)[names(block1) != "group"]

anovaresults <- lapply(testvar, function(varname){
  
  tryCatch({res.aov <- aov(as.formula(paste(varname, "group", sep = " ~ ")), data = block1)
  
  aov.tukey <- TukeyHSD(res.aov)
  
  resultstable <- data.frame(variable = varname, 
                             comparison = rownames(aov.tukey$group),
                             anova.p.value = summary(res.aov)[[1]][["Pr(>F)"]][1],
                             aov.tukey$group)
  
  }, error=function(e){})
  
})

anovaresults <- do.call(rbind, anovaresults)

anovaresults$p_sig 
anovaresults$p_sig[anovaresults$p.adj < 0.1 & anovaresults$p.adj > 0.05] <- "*" 
anovaresults$p_sig[anovaresults$p.adj < 0.05 & anovaresults$p.adj > 0.001] <- "**" 
anovaresults$p_sig[anovaresults$p.adj < 0.001] <- "***" 

# Step 3: Within G2, block-randomize everyone into T2, T3, T4, and T5 with equal probability

g2 <- block1[ which(block1$group == "G2"), ]

### block
set.seed(90210)
data_block <- block(data = g2, n.tr = 4,
                    id.vars = c("SbjNum", "s3"), 
                    block.vars = c("market_size", "trust", "friends_mkt", "yoruba", "rent_dummy", "LIRS_connect", "club"), algorithm = "optGreedy")

### assign one member of each blocked unit to treatment/control
assg <- assignment(data_block, seed = 90210)

### assign block identifiers
g2$block_id <- createBlockIDs(data_block, data = g2, id.var = "SbjNum")

#View(g2[, c("SbjNum", "s3", "block_id")])

### add the treatment group number
l <- data.frame(assg[1])
names(l)[c(1:4)] <- c("assg.1_Treatment1", "assg.1_Treatment2", "assg.1_Treatment3", "assg.1_Treatment4")

l.long <- reshape(l, varying = c("assg.1_Treatment1", "assg.1_Treatment2", "assg.1_Treatment3", "assg.1_Treatment4"), 
                  direction = "long", sep = "_")

names(l.long)[c(2:3)] <- c("treatment", "SbjNum")
#table(l.long$treatment, useNA = "ifany")

l.long$int2_g2 <- NA
l.long$int2_g2 <- ifelse(l.long$treatment == "Treatment1", "T2", l.long$int2_g2)
l.long$int2_g2 <- ifelse(l.long$treatment == "Treatment2", "T3", l.long$int2_g2)
l.long$int2_g2 <- ifelse(l.long$treatment == "Treatment3", "T4", l.long$int2_g2)
l.long$int2_g2 <- ifelse(l.long$treatment == "Treatment4", "T5", l.long$int2_g2)

names(l.long)[1] <- "max_dist_int2_g2" # largest of the multivariate distances between all possible pairs in the block
g2 <- l.long[, c(1, 3, 5)]
g2$SbjNum <- as.character(g2$SbjNum)

### add assignment to the main dataframe
require(plyr)
block1_g2 <- join(block1, g2, by = "SbjNum", type = 'right', match = 'all')

#View(block1_g2[, c("SbjNum", "s3", "market_size", "group", "int2_g2")])
#table(block1_g2$group, useNA = "ifany") # T2 and T3 empty

g2.assign <- block1_g2[ which(is.na(block1_g2$SbjNum) == F), ]
#table(g2.assign$group, useNA = "ifany")

# save dataset
# this dataset contains treatment assignment for vendors that are in "G2"
save(g2.assign, file = "Treatment Assignment/Output Data/g2_assign_part2.rda")

# Step 4: Within G3, block-randomize everyone into T2, T3, T4, and T5 with equal probability

g3 <- block1[ which(block1$group == "G3"), ]

### block
set.seed(90210)
data_block <- block(data = g3, n.tr = 4,
                    id.vars = c("SbjNum", "s3"), 
                    block.vars = c("market_size", "trust", "friends_mkt", "yoruba", "rent_dummy", "LIRS_connect", "club"), algorithm = "optGreedy")

### assign one member of each blocked unit to treatment/control
assg <- assignment(data_block, seed = 90210)

### assign block identifiers
g3$block_id <- createBlockIDs(data_block, data = g3, id.var = "SbjNum")

#View(g3[, c("SbjNum", "s3", "block_id")])

### add the treatment group number
l <- data.frame(assg[1])
names(l)[c(1:4)] <- c("assg.1_Treatment1", "assg.1_Treatment2", "assg.1_Treatment3", "assg.1_Treatment4")

l.long <- reshape(l, varying = c("assg.1_Treatment1", "assg.1_Treatment2", "assg.1_Treatment3", "assg.1_Treatment4"), 
                  direction = "long", sep = "_")

names(l.long)[c(2:3)] <- c("treatment", "SbjNum")
#table(l.long$treatment, useNA = "ifany")

l.long$int2_g3 <- NA
l.long$int2_g3 <- ifelse(l.long$treatment == "Treatment1", "T2", l.long$int2_g3)
l.long$int2_g3 <- ifelse(l.long$treatment == "Treatment2", "T3", l.long$int2_g3)
l.long$int2_g3 <- ifelse(l.long$treatment == "Treatment3", "T4", l.long$int2_g3)
l.long$int2_g3 <- ifelse(l.long$treatment == "Treatment4", "T5", l.long$int2_g3)

names(l.long)[1] <- "max_dist_int2_g3" # largest of the multivariate distances between all possible pairs in the block
g3 <- l.long[, c(1, 3, 5)]
g3$SbjNum <- as.character(g3$SbjNum)

### add assignment to the main dataframe
require(plyr)
block1_g3 <- join(block1, g3, by = "SbjNum", type = 'right', match = 'all')

#View(block1_g3[, c("SbjNum", "s3", "market_size", "group", "int2_g3")])
#table(block1_g3$group, useNA = "ifany") 

g3.assign <- block1_g3[ which(is.na(block1_g3$SbjNum) == F), ]
#table(g3.assign$group, useNA = "ifany")

# save dataset
# this dataset contains treatment assignment for vendors that are in "G3"
save(g3.assign, file = "Treatment Assignment/Output Data/g3_assign_part2.rda")

### append the datasets g2.assign and g3.assign together
names(g2.assign)[264] <- "max_dist_int2"
names(g3.assign)[264] <- "max_dist_int2"
names(g2.assign)[265] <- "int2_treatment"
names(g3.assign)[265] <- "int2_treatment"

int2_treatment <- rbind(g2.assign, g3.assign)

#View(int2_treatment[, c("SbjNum", "s3", "market_size", "group", "int2_treatment")])

#table(int2_treatment$int2_treatment, useNA = "ifany")
#table(g2.assign$int2_treatment, useNA = "ifany")
#table(g3.assign$int2_treatment, useNA = "ifany")

# save dataset
# this dataset contains intervention 2 treatment assignment for vendors (note: no collective )
save(int2_treatment, file = "Treatment Assignment/Output Data/int2_treatment_part2.rda")


### calculate balance statistics
int2_treatment$int2_treatment <- as.character(int2_treatment$int2_treatment)

testvar <- names(int2_treatment)[names(int2_treatment) != "int2_treatment"]

anovaresults <- lapply(testvar, function(varname){
  
  tryCatch({res.aov <- aov(as.formula(paste(varname, "int2_treatment", sep = " ~ ")), data = int2_treatment)
  
  aov.tukey <- TukeyHSD(res.aov)
  
  resultstable <- data.frame(variable = varname, 
                             comparison = rownames(aov.tukey$int2_treatment),
                             anova.p.value = summary(res.aov)[[1]][["Pr(>F)"]][1],
                             aov.tukey$int2_treatment)
  
  }, error=function(e){})
  
})

anovaresults <- do.call(rbind, anovaresults)

anovaresults$p_sig 
anovaresults$p_sig[anovaresults$p.adj < 0.1 & anovaresults$p.adj > 0.05] <- "*" 
anovaresults$p_sig[anovaresults$p.adj < 0.05 & anovaresults$p.adj > 0.001] <- "**" 
anovaresults$p_sig[anovaresults$p.adj < 0.001] <- "***" 

# Step 5: All vendors in G2 will receive collective treatment and all vendors in G3 will not

# check for duplicates
n_occur <- data.frame(table(int2_treatment$SbjNum))
n_occur[n_occur$Freq > 1,]

#View(int2_treatment[, c("SbjNum", "group", "int2_treatment")])

# assign all observations in "G2" to the collective treatment
int2_treatment$collective <- NA
int2_treatment$collective <- ifelse(int2_treatment$group == "G2", "CA", int2_treatment$collective)

# assign all observations in "G3" to the collective control
int2_treatment$collective <- ifelse(int2_treatment$group == "G3", "No CA", int2_treatment$collective)

# tabulate 
#table(int2_treatment$collective, useNA = "ifany")

# create visit status
int2_treatment$visit_status <- ifelse(int2_treatment$kobo == "yes", "completed", NA)
int2_treatment$visit_status <- ifelse(is.na(int2_treatment$kobo), int2_treatment$phone_booth, int2_treatment$visit_status)

#table(int2_treatment$visit_status, useNA = "ifany")

save(int2_treatment, file = "Treatment Assignment/Output Data/int2_treatment_CA_AllMarkets_part2.rda")


# Generate final treatment assignment dataset -----------------------------

rm(list = ls())

# load the datasets
load("02_Randomization/02_Data/int2_treatment_CA_AllMarkets.rda")

int2_part1 <- int2_treatment

load("02_Randomization/02_Data/int2_treatment_CA_AllMarkets_part2.rda")

int2_part2 <- int2_treatment

### merge two datasets
int2_part1$compliance <- ifelse(int2_part1$compliers, "complier", NA)
int2_part2$compliance <- ifelse(int2_part2$noncompliers, "noncomplier", NA)

#View(int2_part1[, c("compliers", "compliance")])
#View(int2_part2[, c("noncompliers", "compliance")])

int2_part1$compliers <- NULL
int2_part2$noncompliers <- NULL

int2_treatment_assignment_final <- rbind(int2_part1, int2_part2)

#View(int2_treatment_assignment_final[, c("SbjNum", "int2_treatment", "collective", "visit_status", "compliance")])

# tabulate
#table(int2_treatment_assignment_final$int2_treatment, useNA = "ifany")
#table(int2_part1$int2_treatment, useNA = "ifany")
#table(int2_part2$int2_treatment, useNA = "ifany")

#table(int2_treatment_assignment_final$collective, useNA = "ifany")
#table(int2_part1$collective, useNA = "ifany")
#table(int2_part2$collective, useNA = "ifany")

### save the dataset
### this is the FINAL treatment assignment
save(int2_treatment_assignment_final, file = "Treatment Assignment/Output Data/int2_treatment_assignment_final.rda")

